library(gridExtra)
library(knitr)
library(ggplot2)
library(fastqcr)
#must run this if fastqc is not already installed locally
fastqc_install()
###ONLY THIS CHUNK REQUIRES MODIFICATION###
###assign your directory locations here:

#specify full path to directory containing a .fastq.gz file for each sample
fq.dir<-"/home/a619f280/work/phil.stachyris/fastq"

#specify full path to the output directory where you want 
qc.dir<-"/home/a619f280/work/phil.stachyris/qc"
#run fastqc on all .fastq.gz files, through r
#This only needs to be run once, if only tweaking downstream visualizations, you can comment out this step
fastqc(fq.dir = fq.dir, # FASTQ files directory
       qc.dir = qc.dir, # Results directory
       threads = 4      # Number of threads
       )
# List of files in the output directory to ensure fastqc worked
list.files(qc.dir)

[1] “fastqcr.Rmd”
[2] “run.fastqcr.sh”
[3] “S_capitalis_28326_fastqc.html”
[4] “S_capitalis_28326_fastqc.zip”
[5] “S_capitalis_28338_fastqc.html”
[6] “S_capitalis_28338_fastqc.zip”
[7] “S_capitalis_28339_fastqc.html”
[8] “S_capitalis_28339_fastqc.zip”
[9] “S_capitalis_28341_fastqc.html”
[10] “S_capitalis_28341_fastqc.zip”
[11] “S_capitalis_28342_fastqc.html”
[12] “S_capitalis_28342_fastqc.zip”
[13] “S_capitalis_29959_fastqc.html”
[14] “S_capitalis_29959_fastqc.zip”
[15] “S_capitalis_29965_fastqc.html”
[16] “S_capitalis_29965_fastqc.zip”
[17] “S_capitalis_29968_fastqc.html”
[18] “S_capitalis_29968_fastqc.zip”
[19] “S_capitalis_CMNH37769_fastqc.html”
[20] “S_capitalis_CMNH37769_fastqc.zip”
[21] “S_dennistouni_19648_fastqc.html”
[22] “S_dennistouni_19648_fastqc.zip”
[23] “S_dennistouni_19656_fastqc.html”
[24] “S_dennistouni_19656_fastqc.zip”
[25] “S_dennistouni_20186_fastqc.html”
[26] “S_dennistouni_20186_fastqc.zip”
[27] “S_dennistouni_20187_fastqc.html”
[28] “S_dennistouni_20187_fastqc.zip”
[29] “S_dennistouni_20188_fastqc.html”
[30] “S_dennistouni_20188_fastqc.zip”
[31] “S_dennistouni_20191_fastqc.html”
[32] “S_dennistouni_20191_fastqc.zip”
[33] “S_dennistouni_20201_fastqc.html”
[34] “S_dennistouni_20201_fastqc.zip”
[35] “S_dennistouni_20222_fastqc.html”
[36] “S_dennistouni_20222_fastqc.zip”
[37] “S_dennistouni_20224_fastqc.html”
[38] “S_dennistouni_20224_fastqc.zip”
[39] “S_dennistouni_20225_fastqc.html”
[40] “S_dennistouni_20225_fastqc.zip”
[41] “S_dennistouni_20229_fastqc.html”
[42] “S_dennistouni_20229_fastqc.zip”
[43] “S_dennistouni_20234_fastqc.html”
[44] “S_dennistouni_20234_fastqc.zip”
[45] “S_dennistouni_20335_fastqc.html”
[46] “S_dennistouni_20335_fastqc.zip”
[47] “S_dennistouni_21084_fastqc.html”
[48] “S_dennistouni_21084_fastqc.zip”
[49] “S_dennistouni_21086_fastqc.html”
[50] “S_dennistouni_21086_fastqc.zip”
[51] “S_dennistouni_21112_fastqc.html”
[52] “S_dennistouni_21112_fastqc.zip”
[53] “S_dennistouni_25696_fastqc.html”
[54] “S_dennistouni_25696_fastqc.zip”
[55] “S_dennistouni_25702_fastqc.html”
[56] “S_dennistouni_25702_fastqc.zip”
[57] “S_dennistouni_25703_fastqc.html”
[58] “S_dennistouni_25703_fastqc.zip”
[59] “S_dennistouni_25713_fastqc.html”
[60] “S_dennistouni_25713_fastqc.zip”
[61] “S_dennistouni_25716_fastqc.html”
[62] “S_dennistouni_25716_fastqc.zip”
[63] “S_dennistouni_25743_fastqc.html”
[64] “S_dennistouni_25743_fastqc.zip”
[65] “S_dennistouni_25817_fastqc.html”
[66] “S_dennistouni_25817_fastqc.zip”
[67] “S_dennistouni_25828_fastqc.html”
[68] “S_dennistouni_25828_fastqc.zip”
[69] “S_dennistouni_25829_fastqc.html”
[70] “S_dennistouni_25829_fastqc.zip”
[71] “S_dennistouni_25846_fastqc.html”
[72] “S_dennistouni_25846_fastqc.zip”
[73] “S_dennistouni_25885_fastqc.html”
[74] “S_dennistouni_25885_fastqc.zip”
[75] “S_dennistouni_25898_fastqc.html”
[76] “S_dennistouni_25898_fastqc.zip”
[77] “S_dennistouni_25903_fastqc.html”
[78] “S_dennistouni_25903_fastqc.zip”
[79] “S_dennistouni_25908_fastqc.html”
[80] “S_dennistouni_25908_fastqc.zip”
[81] “S_dennistouni_25939_fastqc.html”
[82] “S_dennistouni_25939_fastqc.zip”
[83] “S_dennistouni_25950_fastqc.html”
[84] “S_dennistouni_25950_fastqc.zip”
[85] “S_dennistouni_26573_fastqc.html”
[86] “S_dennistouni_26573_fastqc.zip”
[87] “S_dennistouni_26579_fastqc.html”
[88] “S_dennistouni_26579_fastqc.zip”
[89] “S_dennistouni_26961_fastqc.html”
[90] “S_dennistouni_26961_fastqc.zip”
[91] “S_dennistouni_CMNH38201_fastqc.html”
[92] “S_dennistouni_CMNH38201_fastqc.zip”
[93] “S_nigrocapitata_14192_fastqc.html”
[94] “S_nigrocapitata_14192_fastqc.zip”
[95] “S_nigrocapitata_14199_fastqc.html”
[96] “S_nigrocapitata_14199_fastqc.zip”
[97] “S_nigrocapitata_18034_fastqc.html”
[98] “S_nigrocapitata_18034_fastqc.zip”
[99] “S_nigrocapitata_18040_fastqc.html”
[100] “S_nigrocapitata_18040_fastqc.zip”
[101] “S_nigrocapitata_18083_fastqc.html”
[102] “S_nigrocapitata_18083_fastqc.zip”
[103] “S_nigrocapitata_25550_fastqc.html”
[104] “S_nigrocapitata_25550_fastqc.zip”
[105] “S_nigrocapitata_25551_fastqc.html”
[106] “S_nigrocapitata_25551_fastqc.zip”
[107] “S_nigrocapitata_28214_fastqc.html”
[108] “S_nigrocapitata_28214_fastqc.zip”
[109] “S_nigrocapitata_28215_fastqc.html”
[110] “S_nigrocapitata_28215_fastqc.zip”
[111] “S_nigrocapitata_33030_fastqc.html”
[112] “S_nigrocapitata_33030_fastqc.zip”
[113] “S_nigrocapitata_33060_fastqc.html”
[114] “S_nigrocapitata_33060_fastqc.zip”
[115] “S_nigrocapitata_FMNH472765_fastqc.html” [116] “S_nigrocapitata_FMNH472765_fastqc.zip” [117] “S_plateni_19056_fastqc.html”
[118] “S_plateni_19056_fastqc.zip”
[119] “S_plateni_28305_fastqc.html”
[120] “S_plateni_28305_fastqc.zip”
[121] “S_plateni_28350_fastqc.html”
[122] “S_plateni_28350_fastqc.zip”
[123] “S_whiteheadi_18001_fastqc.html”
[124] “S_whiteheadi_18001_fastqc.zip”
[125] “S_whiteheadi_20988_fastqc.html”
[126] “S_whiteheadi_20988_fastqc.zip”
[127] “slurm-36787160.out”
[128] “slurm-36787164.out”
[129] “slurm-36787165.out”
[130] “slurm-36807798.out”
[131] "Stach__FMNH449754_fastqc.html"
[132] "Stach__FMNH449754_fastqc.zip"
[133] "Stach__FMNH449756_fastqc.html"
[134] "Stach__FMNH449756_fastqc.zip"

#create a character vector where each value is the full path to the .zip created by fastqc() for a given sample
samps<-list.files(qc.dir, full.names = T, pattern = "*.zip")

#plot qc test results for each sample
for (i in samps){
  #read info for given sample from the .zip file generated in the previous step
  samp.info <- qc_read(i)
  #open blank list to hold qc visualizations for the given sample
  plot<-list()
  #do qc for the given sample
  plot[[1]]<-qc_plot(samp.info, "Basic statistics")
  plot[[2]]<-qc_plot(samp.info, "Per sequence quality scores")
  plot[[3]]<-qc_plot(samp.info, "Sequence duplication levels")
  #visualize tables
  print(paste0("QC results for sample ", gsub(".*/", "", i)))

  cat('\n')

  print(kable(plot[[1]]))

  cat('\n')

  #visualize plots
  grid.arrange(plot[[2]],plot[[3]],
               ncol=2)
  
  #clear plot to hold info for next sample
  rm(plot)
}

[1] “QC results for sample S_capitalis_28326_fastqc.zip”

Measure Value
Filename S_capitalis_28326.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 2271883
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_capitalis_28338_fastqc.zip”

Measure Value
Filename S_capitalis_28338.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 7353870
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_capitalis_28339_fastqc.zip”

Measure Value
Filename S_capitalis_28339.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 4011414
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_capitalis_28341_fastqc.zip”

Measure Value
Filename S_capitalis_28341.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1289015
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_capitalis_28342_fastqc.zip”

Measure Value
Filename S_capitalis_28342.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 2393217
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_capitalis_29959_fastqc.zip”

Measure Value
Filename S_capitalis_29959.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 2212314
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_capitalis_29965_fastqc.zip”

Measure Value
Filename S_capitalis_29965.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1115542
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_capitalis_29968_fastqc.zip”

Measure Value
Filename S_capitalis_29968.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 7389825
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_capitalis_CMNH37769_fastqc.zip”

Measure Value
Filename S_capitalis_CMNH37769.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 3199634
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_dennistouni_19648_fastqc.zip”

Measure Value
Filename S_dennistouni_19648.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 617100
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_dennistouni_19656_fastqc.zip”

Measure Value
Filename S_dennistouni_19656.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 8983624
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_dennistouni_20186_fastqc.zip”

Measure Value
Filename S_dennistouni_20186.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 3219997
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_dennistouni_20187_fastqc.zip”

Measure Value
Filename S_dennistouni_20187.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 3879876
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_dennistouni_20188_fastqc.zip”

Measure Value
Filename S_dennistouni_20188.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 3825147
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_dennistouni_20191_fastqc.zip”

Measure Value
Filename S_dennistouni_20191.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 3882279
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_dennistouni_20201_fastqc.zip”

Measure Value
Filename S_dennistouni_20201.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 215019
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_dennistouni_20222_fastqc.zip”

Measure Value
Filename S_dennistouni_20222.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 7410303
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_dennistouni_20224_fastqc.zip”

Measure Value
Filename S_dennistouni_20224.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 4820610
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_dennistouni_20225_fastqc.zip”

Measure Value
Filename S_dennistouni_20225.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 229061
Sequences flagged as poor quality 0
Sequence length 95
%GC 36

[1] “QC results for sample S_dennistouni_20229_fastqc.zip”

Measure Value
Filename S_dennistouni_20229.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 2196690
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_dennistouni_20234_fastqc.zip”

Measure Value
Filename S_dennistouni_20234.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 67716
Sequences flagged as poor quality 0
Sequence length 95
%GC 36

[1] “QC results for sample S_dennistouni_20335_fastqc.zip”

Measure Value
Filename S_dennistouni_20335.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 354359
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_dennistouni_21084_fastqc.zip”

Measure Value
Filename S_dennistouni_21084.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 2795688
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_dennistouni_21086_fastqc.zip”

Measure Value
Filename S_dennistouni_21086.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 14622
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_dennistouni_21112_fastqc.zip”

Measure Value
Filename S_dennistouni_21112.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 3352183
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_dennistouni_25696_fastqc.zip”

Measure Value
Filename S_dennistouni_25696.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 3189487
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_dennistouni_25702_fastqc.zip”

Measure Value
Filename S_dennistouni_25702.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 4377719
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_dennistouni_25703_fastqc.zip”

Measure Value
Filename S_dennistouni_25703.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 4087648
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_dennistouni_25713_fastqc.zip”

Measure Value
Filename S_dennistouni_25713.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 14815
Sequences flagged as poor quality 0
Sequence length 95
%GC 36

[1] “QC results for sample S_dennistouni_25716_fastqc.zip”

Measure Value
Filename S_dennistouni_25716.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 2043166
Sequences flagged as poor quality 0
Sequence length 95
%GC 36

[1] “QC results for sample S_dennistouni_25743_fastqc.zip”

Measure Value
Filename S_dennistouni_25743.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 404420
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_dennistouni_25817_fastqc.zip”

Measure Value
Filename S_dennistouni_25817.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 3473626
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_dennistouni_25828_fastqc.zip”

Measure Value
Filename S_dennistouni_25828.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 558011
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_dennistouni_25829_fastqc.zip”

Measure Value
Filename S_dennistouni_25829.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 16862
Sequences flagged as poor quality 0
Sequence length 95
%GC 36

[1] “QC results for sample S_dennistouni_25846_fastqc.zip”

Measure Value
Filename S_dennistouni_25846.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 290010
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_dennistouni_25885_fastqc.zip”

Measure Value
Filename S_dennistouni_25885.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1345393
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_dennistouni_25898_fastqc.zip”

Measure Value
Filename S_dennistouni_25898.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1077889
Sequences flagged as poor quality 0
Sequence length 95
%GC 36

[1] “QC results for sample S_dennistouni_25903_fastqc.zip”

Measure Value
Filename S_dennistouni_25903.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 2987903
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_dennistouni_25908_fastqc.zip”

Measure Value
Filename S_dennistouni_25908.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1829975
Sequences flagged as poor quality 0
Sequence length 95
%GC 36

[1] “QC results for sample S_dennistouni_25939_fastqc.zip”

Measure Value
Filename S_dennistouni_25939.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 2193303
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_dennistouni_25950_fastqc.zip”

Measure Value
Filename S_dennistouni_25950.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 26200
Sequences flagged as poor quality 0
Sequence length 95
%GC 36

[1] “QC results for sample S_dennistouni_26573_fastqc.zip”

Measure Value
Filename S_dennistouni_26573.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 2038928
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_dennistouni_26579_fastqc.zip”

Measure Value
Filename S_dennistouni_26579.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 2338750
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_dennistouni_26961_fastqc.zip”

Measure Value
Filename S_dennistouni_26961.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1899146
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_dennistouni_CMNH38201_fastqc.zip”

Measure Value
Filename S_dennistouni_CMNH38201.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 2945
Sequences flagged as poor quality 0
Sequence length 95
%GC 36

[1] “QC results for sample S_nigrocapitata_14192_fastqc.zip”

Measure Value
Filename S_nigrocapitata_14192.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 507426
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_nigrocapitata_14199_fastqc.zip”

Measure Value
Filename S_nigrocapitata_14199.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1323513
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_nigrocapitata_18034_fastqc.zip”

Measure Value
Filename S_nigrocapitata_18034.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 2180339
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_nigrocapitata_18040_fastqc.zip”

Measure Value
Filename S_nigrocapitata_18040.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 5774993
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_nigrocapitata_18083_fastqc.zip”

Measure Value
Filename S_nigrocapitata_18083.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 3303412
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_nigrocapitata_25550_fastqc.zip”

Measure Value
Filename S_nigrocapitata_25550.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 193983
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_nigrocapitata_25551_fastqc.zip”

Measure Value
Filename S_nigrocapitata_25551.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 717
Sequences flagged as poor quality 0
Sequence length 95
%GC 36

[1] “QC results for sample S_nigrocapitata_28214_fastqc.zip”

Measure Value
Filename S_nigrocapitata_28214.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 9305
Sequences flagged as poor quality 0
Sequence length 95
%GC 36

[1] “QC results for sample S_nigrocapitata_28215_fastqc.zip”

Measure Value
Filename S_nigrocapitata_28215.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 102531
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_nigrocapitata_33030_fastqc.zip”

Measure Value
Filename S_nigrocapitata_33030.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1031722
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_nigrocapitata_33060_fastqc.zip”

Measure Value
Filename S_nigrocapitata_33060.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 322869
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_nigrocapitata_FMNH472765_fastqc.zip”

Measure Value
Filename S_nigrocapitata_FMNH472765.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 4399
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_plateni_19056_fastqc.zip”

Measure Value
Filename S_plateni_19056.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1818956
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_plateni_28305_fastqc.zip”

Measure Value
Filename S_plateni_28305.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1826744
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_plateni_28350_fastqc.zip”

Measure Value
Filename S_plateni_28350.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 944520
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_whiteheadi_18001_fastqc.zip”

Measure Value
Filename S_whiteheadi_18001.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 2546312
Sequences flagged as poor quality 0
Sequence length 95
%GC 37

[1] “QC results for sample S_whiteheadi_20988_fastqc.zip”

Measure Value
Filename S_whiteheadi_20988.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 83409
Sequences flagged as poor quality 0
Sequence length 95
%GC 36

[1] "QC results for sample Stach__FMNH449754_fastqc.zip"

Measure Value
Filename Stach__FMNH449754.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 31563
Sequences flagged as poor quality 0
Sequence length 95
%GC 36

[1] "QC results for sample Stach__FMNH449756_fastqc.zip"

Measure Value
Filename Stach__FMNH449756.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 13154
Sequences flagged as poor quality 0
Sequence length 95
%GC 36

#aggregate the reports by pointing this function to the folder holding output of fastqc()
qc <- qc_aggregate(qc.dir, progressbar = F)

#stats per sample
knitr::kable(qc_stats(qc))
sample pct.dup pct.gc tot.seq seq.length
S_capitalis_28326.fq.gz 94.82 37 2271883 95
S_capitalis_28338.fq.gz 96.69 37 7353870 95
S_capitalis_28339.fq.gz 96.02 37 4011414 95
S_capitalis_28341.fq.gz 95.03 37 1289015 95
S_capitalis_28342.fq.gz 95.86 37 2393217 95
S_capitalis_29959.fq.gz 95.63 37 2212314 95
S_capitalis_29965.fq.gz 94.54 37 1115542 95
S_capitalis_29968.fq.gz 96.98 37 7389825 95
S_capitalis_CMNH37769.fq.gz 96.08 37 3199634 95
S_dennistouni_19648.fq.gz 93.81 37 617100 95
S_dennistouni_19656.fq.gz 97.04 37 8983624 95
S_dennistouni_20186.fq.gz 95.70 37 3219997 95
S_dennistouni_20187.fq.gz 96.16 37 3879876 95
S_dennistouni_20188.fq.gz 95.83 37 3825147 95
S_dennistouni_20191.fq.gz 96.01 37 3882279 95
S_dennistouni_20201.fq.gz 90.02 37 215019 95
S_dennistouni_20222.fq.gz 96.82 37 7410303 95
S_dennistouni_20224.fq.gz 96.57 37 4820610 95
S_dennistouni_20225.fq.gz 89.02 36 229061 95
S_dennistouni_20229.fq.gz 96.12 37 2196690 95
S_dennistouni_20234.fq.gz 81.53 36 67716 95
S_dennistouni_20335.fq.gz 92.02 37 354359 95
S_dennistouni_21084.fq.gz 96.33 37 2795688 95
S_dennistouni_21086.fq.gz 69.61 37 14622 95
S_dennistouni_21112.fq.gz 95.58 37 3352183 95
S_dennistouni_25696.fq.gz 95.62 37 3189487 95
S_dennistouni_25702.fq.gz 96.20 37 4377719 95
S_dennistouni_25703.fq.gz 96.61 37 4087648 95
S_dennistouni_25713.fq.gz 54.52 36 14815 95
S_dennistouni_25716.fq.gz 95.89 36 2043166 95
S_dennistouni_25743.fq.gz 92.25 37 404420 95
S_dennistouni_25817.fq.gz 95.53 37 3473626 95
S_dennistouni_25828.fq.gz 93.21 37 558011 95
S_dennistouni_25829.fq.gz 75.00 36 16862 95
S_dennistouni_25846.fq.gz 90.57 37 290010 95
S_dennistouni_25885.fq.gz 93.69 37 1345393 95
S_dennistouni_25898.fq.gz 94.96 36 1077889 95
S_dennistouni_25903.fq.gz 96.20 37 2987903 95
S_dennistouni_25908.fq.gz 95.65 36 1829975 95
S_dennistouni_25939.fq.gz 95.28 37 2193303 95
S_dennistouni_25950.fq.gz 65.93 36 26200 95
S_dennistouni_26573.fq.gz 95.68 37 2038928 95
S_dennistouni_26579.fq.gz 95.24 37 2338750 95
S_dennistouni_26961.fq.gz 95.02 37 1899146 95
S_dennistouni_CMNH38201.fq.gz 25.91 36 2945 95
S_nigrocapitata_14192.fq.gz 92.51 37 507426 95
S_nigrocapitata_14199.fq.gz 93.96 37 1323513 95
S_nigrocapitata_18034.fq.gz 95.47 37 2180339 95
S_nigrocapitata_18040.fq.gz 96.62 37 5774993 95
S_nigrocapitata_18083.fq.gz 96.28 37 3303412 95
S_nigrocapitata_25550.fq.gz 89.71 37 193983 95
S_nigrocapitata_25551.fq.gz 66.95 36 717 95
S_nigrocapitata_28214.fq.gz 75.83 36 9305 95
S_nigrocapitata_28215.fq.gz 83.63 37 102531 95
S_nigrocapitata_33030.fq.gz 94.38 37 1031722 95
S_nigrocapitata_33060.fq.gz 90.97 37 322869 95
S_nigrocapitata_FMNH472765.fq.gz 53.81 37 4399 95
S_plateni_19056.fq.gz 95.50 37 1818956 95
S_plateni_28305.fq.gz 95.89 37 1826744 95
S_plateni_28350.fq.gz 90.48 37 944520 95
S_whiteheadi_18001.fq.gz 95.83 37 2546312 95
S_whiteheadi_20988.fq.gz 70.18 36 83409 95
Stach__FMNH449754.fq.gz 69.15 36 31563 95
Stach__FMNH449756.fq.gz 70.15 36 13154 95

solid red line = median sample value

dashed red line = 10% of median sample value

#save stats info as an object
stats.info<-qc_stats(qc)
#make tot.seq numeric
stats.info$tot.seq<-as.numeric(stats.info$tot.seq)

#make histogram of number of sequence reads for each sample
ggplot(stats.info, aes(x=tot.seq))+
              geom_histogram(color="black", fill="white", bins=20)+
              geom_vline(aes(xintercept=median(tot.seq)), color = "red")+
              geom_vline(aes(xintercept=median(tot.seq)*.1), color = "red", lty=14)+
              theme_classic()+
              xlab("Number of sequencing reads")

#solid red line = median sample value
#dashed red line = 10% of median sample value
ggplot(stats.info, aes(x=tot.seq))+
              geom_histogram(color="black", fill="white", bins=200)+
              geom_vline(aes(xintercept=median(tot.seq)), color = "red")+
              geom_vline(aes(xintercept=median(tot.seq)*.1), color = "red", lty=14)+
              theme_classic()+
              xlab("Number of sequencing reads")

#show me the samples that have less than 10% of the number of reads as the median sample from this experiment (these should be dropped immediately)
print(paste("Median sample contains", median(stats.info$tot.seq), "reads. The following samples contain less than", median(stats.info$tot.seq)*.1, "reads (10% of the median), and should likely be dropped"))

[1] “Median sample contains 1828359.5 reads. The following samples contain less than 182835.95 reads (10% of the median), and should likely be dropped”

knitr::kable(stats.info[stats.info$tot.seq < median(stats.info$tot.seq)*.1,])
sample pct.dup pct.gc tot.seq seq.length
S_dennistouni_20234.fq.gz 81.53 36 67716 95
S_dennistouni_21086.fq.gz 69.61 37 14622 95
S_dennistouni_25713.fq.gz 54.52 36 14815 95
S_dennistouni_25829.fq.gz 75.00 36 16862 95
S_dennistouni_25950.fq.gz 65.93 36 26200 95
S_dennistouni_CMNH38201.fq.gz 25.91 36 2945 95
S_nigrocapitata_25551.fq.gz 66.95 36 717 95
S_nigrocapitata_28214.fq.gz 75.83 36 9305 95
S_nigrocapitata_28215.fq.gz 83.63 37 102531 95
S_nigrocapitata_FMNH472765.fq.gz 53.81 37 4399 95
S_whiteheadi_20988.fq.gz 70.18 36 83409 95
Stach__FMNH449754.fq.gz 69.15 36 31563 95
Stach__FMNH449756.fq.gz 70.15 36 13154 95